# Summary: Creates SI C

##############################################
#----------------- SI C ---------------------#
##############################################

options(scipen=999)
rm(list = ls())

# Set working directory
# setwd('~/Dataverse/')

# Load datasets
load(file = "./Data/df.RData")
load(file = "./Data/df_long.RData")

#############
# TABLE C.1 #
#############

tb_C1 = df %>%
  dplyr::select(considered_running,racegender,
                political_interest,
                n_offices_qualified,
                partyID ,
                encouraged,
                AGE7,
                education,
                income,
                married)

tb_C1 = fastDummies::dummy_cols(tb_C1, select_columns = c("racegender","encouraged","partyID"))
tb_C1 = tb_C1[,c(1,3,4,7,8,9,10:23)] %>%
  drop_na()

n = nrow(tb_C1)
mean = apply(tb_C1,2,mean, na.rm=T)
sd = apply(tb_C1,2,sd)
min = apply(tb_C1,2,min)
max = apply(tb_C1,2,max)

tb_C1 = cbind.data.frame(n,mean,sd,min,max)

sink("./SI_C_Results/tableC1.txt")
print("TABLE C.1: Descriptive Statistics: Models with Respondent as the Unit of Analysis")
tb_C1
sink()

#############
# TABLE C.2 #
#############

tb_C2 = df_long %>%
  dplyr::select(considered_running,racegender,office,
                political_interest,
                qualified_office,
                partyID,
                encouraged,
                AGE7,
                education,
                income,
                married) %>%
  mutate(office_considered = ifelse(considered_running==1, office, NA) )

tb_C2 = fastDummies::dummy_cols(tb_C2, select_columns = c("racegender","encouraged","partyID"))
tb_C2 = tb_C2[,c(1,4,5,8:11,13:25)] %>%
  drop_na()

n = nrow(tb_C2)
mean = apply(tb_C2,2,mean, na.rm=T)
sd = apply(tb_C2,2,sd)
min = apply(tb_C2,2,min)
max = apply(tb_C2,2,max)

tb_C2 = cbind.data.frame(n,mean,sd,min,max)

sink("./SI_C_Results/tableC2.txt")
print("TABLE C.2: Descriptive Statistics: Model with Respondent-Office as the Unit of Analysis")
tb_C2
sink()


#############
# TABLE C.3 #
#############

tb_C3 = df_long %>%
  dplyr::select(considered_running,racegender,office,
                political_interest,
                qualified_office,
                partyID,
                encouraged,
                AGE7,
                education,
                income,
                married,
                office) %>%
  drop_na() %>%
  group_by(office) %>%
  mutate(n_respondents_office = n()) %>%
  group_by(office,racegender) %>%
  mutate(n_office_racegender = n(),
         n_considered =  sum(considered_running, na.rm = T),
         prop_yes = n_considered/n_office_racegender) %>%
  dplyr::select(office, racegender, n_respondents_office, n_office_racegender, n_considered, prop_yes) %>%
  distinct() %>%
  group_by(office) %>%
  mutate(total_considered_office = sum(n_considered)) %>%
  group_by(racegender) %>%
  mutate(n_racegender_considered = sum(n_considered),
         prop_racegender_considered = n_racegender_considered/n_office_racegender)

tb_C3$prop_yes = round(tb_C3$prop_yes, digits=2)
tb_C3$prop_racegender_considered = round(tb_C3$prop_racegender_considered, digits=2)

sink("./SI_C_Results/tableC3.txt")
print("TABLE C.3:  Descriptive Statistics: Proportion of Respondents Expressing Interest in Running for Each Office by Race and Gender")
tb_C3
sink()


#############
# TABLE C.4 #
#############

tb_C4 = df_long %>%
  dplyr::select(considered_running,racegender,encouraged,
                political_interest,
                n_offices_qualified,
                partyID,
                AGE7,
                education,
                income,
                married) %>%
  drop_na() %>%
  group_by(racegender) %>%
  mutate(n_racegender = n()) %>%
  group_by(encouraged) %>%
  mutate(n_encouraged_type = n()) %>%
  group_by(racegender,n_encouraged_type) %>%
  mutate(n_race_encouraged = n()) %>%
  dplyr::select(racegender,encouraged,n_racegender,n_encouraged_type,n_race_encouraged) %>%
  distinct() %>%
  mutate(prop_race_encouraged_type = n_race_encouraged/n_racegender)

sink("./SI_C_Results/tableC4.txt")
print("TABLE C.4: Descriptive Statistics: Proportion of Respondents Encouraged to Run for Office by Race and Gender")
tb_C4
sink()

